ssc install blindschemes
set scheme plotplain
   
clear all
set more off
global NSS6110 "$Master\NSS_61_10"

global do_files "$NSS6110\do_files"
global concordances "$NSS6110\concordances"
global results "$NSS6110\results"
global raw "$NSS6110\raw"
global data "$NSS6110\data"

cap mkdir "$concordances"
cap mkdir "$results"
cap mkdir "$data"

use "$raw\Block_4_level_03", clear

merge 1:1 PID using "$raw\Block_5pt1_level_04"

rename General_education gen_edu
destring gen_edu, replace

#delimit ;
* This variables labels change with rounds *;
label values gen_edu gen_edu;
label define gen_edu
	1           "Not literate"                
	2           "Literate through Non-formal Education Courses or Adult Education Centre"               
	3           "Literate through Total Literacy Campaign"    
	4           "Literate through other means"
	5			"Literate but below primary"
	6			"Primary"
	7			"Middle"
	8			"Secondary"
	10			"Higher Secondary"
	11			"Diploma/Certificate Course"
	12			"Graduate"
	13			"Postgraduate and above"
	;
	

* Generate a recoded educ variable representing years of schooling *;
gen educ_recode = 0;
replace educ_recode = 1 if gen_edu>=2&gen_edu<=5;
replace educ_recode = 5 if gen_edu==6;
replace educ_recode = 8 if gen_edu==7;
replace educ_recode = 10 if gen_edu==8;
replace educ_recode = 12 if gen_edu==10;
replace educ_recode = 15 if gen_edu>=11;
replace educ_recode = . if gen_edu==.;

label values educ_recode educ_recode;
label define educ_recode
	0          "Not literate"                
	1          "Literate without primary"               
	5          "Primary"    
	8          "Middle"
	10			"Secondary"
	12			"Higher Secondary"
	15			"Graduate"
	;
	
#delimit cr

drop if educ_recode==.
destring State_region, replace
gen state = floor(State_region/10)
sort state

gen pr_nic_2d = real(substr(trim(Usual_principal_activity_NIC_5_d),1,2))
keep if pr_nic_2d>=15&pr_nic_2d<=36

gen mult_comb=MLTSS/200
replace mult_comb=MLTSS/100 if NSC==1

* Share of skilled in total *
egen double pop = sum(mult_comb)
egen pop_educ = sum(mult_comb),by(educ_recode)
gen sh_educ = pop_educ/pop
tab educ_recode, su (sh_educ)

* Share of different educ levels in each size category *
rename No_of_workers_in_enterprises no_wrk_code
destring no_wrk_code, replace
drop if no_wrk_code==.
egen pop_size = sum(mult_comb),by(no_wrk_code)
egen pop_size_educ = sum(mult_comb),by(no_wrk_code educ_recode)
gen sh_size_educ = pop_size_educ/pop_size
egen tag_size_educ = tag(no_wrk_code educ_recode)
sort no_wrk_code educ_recode
egen test2 = sum(sh_size_educ) if tag_size_educ==1, by(no_wrk_code)
su test2
tab educ_recode no_wrk_code,su(sh_size_educ)


egen tag_educ_size = tag(educ_recode no_wrk_code)
keep if tag_educ_size==1
keep educ_recode no_wrk_code sh_size_educ
reshape wide  sh_size_educ, i( no_wrk_code) j(educ_recode)
ren  sh_size_educ0 not
ren sh_size_educ1 literate
ren  sh_size_educ5 primary
ren  sh_size_educ8 middle
ren  sh_size_educ10 secondary
ren  sh_size_educ12 higher
ren  sh_size_educ15 graduate
drop if no_wrk_code==9

egen noschool=rowtotal(not literate)
egen Grade1_9=rowtotal(primary middle )
egen Grade10_12=rowtotal(secondary higher)
egen Grade12plus=rowtotal(graduate)

keep no_wrk_code noschool Grade1_9 Grade10_12 Grade12plus
gen Label_firm_size="."
replace  Label_firm_size="L less than or equal to 5" if no_wrk_code==1
replace  Label_firm_size="L greater than 5 but less than or equal to 10" if no_wrk_code==2
replace  Label_firm_size="L greater than 10 but less than or equal to 20" if no_wrk_code==3
replace  Label_firm_size="L greater than 20" if no_wrk_code==4
save "$results\T9_size_educ.dta", replace